View results table from gene differential analysis.
# load de outputs
res.file<-paste0(workdir, "/analysed_data/differential_analysis_BMP4_CNTF.xlsx")
dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
# replace long sheet name from the keys
dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
# select comparisons of interest
dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
setNames(nm = dge_res_sheets$sheets)
# dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# "CNTF_diff_3w - progenitor|untreated")]
dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# select relevant comparisons
dgeBulk_sel = dgeBulk_sel[-c(6,8,14)]# for each comparison, return a table of top hits
deTopList <- c()
for (i in 1:length(dgeBulk_sel)) {
# select item from list
de = dgeBulk_sel[[i]]
name=names(dgeBulk_sel)[i]
deTop = de %>% arrange(pvalue) %>% slice_head(n=100)
deTopList <- c(deTopList, list(deTop ))
}names(deTopList) <- names(dgeBulk_sel)
for (i in 1:length(deTopList)) {
name = names(deTopList)[[i]]
cat('\n')
cat('### ', name, ' \n')
cat('\n')
data.frame(deTopList[[i]])
cat('\n')
}# countSig <- function(i) {
# # select item from list
# de = dgeBulk_sel[[i]]
# name=names(dgeBulk_sel)[i]
#
# # count significant up and down DEGs
# deCount = de %>% filter(sig=="<= 0.05") %>% group_by(class) %>% summarise(count=n())
#
# return(deCount)
# }View counts of significant differential analysis hits.
# # load differentiated BMP4 and CNTF marker genes
# res.file<-paste0(workdir, "/analysed_data/differential_analysis_wo2W_BMP4_CNTF_wo2W.xlsx")
# dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
# dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
# dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
#
# # replace long sheet name from the keys
# dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
#
# # select comparisons of interest
# dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
# setNames(nm = dge_res_sheets$sheets)
# # dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# # "CNTF_diff_3w - progenitor|untreated")]
# dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# dgeBulk_sel = dgeBulk_sel[-c(7)]View results table from gene differential analysis.
# countSig <- function(i) {
# # select item from list
# de = dgeBulk_sel[[i]]
# name=names(dgeBulk_sel)[i]
#
# # count significant up and down DEGs
# deCount = de %>% filter(sig=="<= 0.05") %>% group_by(class) %>% summarise(count=n())
#
# return(deCount)
# }View counts of significant differential analysis hits.
Visualising the effect sizes and significance for differential gene expression in differentiated and treated astrocytes.
# load de outputs
res.file<-paste0(workdir, "/analysed_data/differential_analysis_BMP4_CNTF.xlsx")
dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
# replace long sheet name from the keys
dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
# select comparisons of interest
dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
setNames(nm = dge_res_sheets$sheets)
# dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# "CNTF_diff_3w - progenitor|untreated")]
dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# select relevant comparisons
dgeBulk_sel = dgeBulk_sel[-c(6,8,14)]plotl <- c()
for (i in 1:length(dgeBulk_sel)) {
# select relevant comparison
de = dgeBulk_sel[[i]]
name=names(dgeBulk_sel)[i]
# print(name)
# ensure empty gene symbols are replaced with ensembl IDs
de[is.na(de$symbol),]$symbol <- de[is.na(de$symbol),]$id
# add a column of NAs
de$diffexpressed <- NA
# if log2Foldchange > 0.0 and pvalue < 0.05, set as "UP"
de$diffexpressed[de$log2FoldChange > 0.0 & de$pvalue < 0.05] <- "UP"
# if log2Foldchange < -0.0 and pvalue < 0.05, set as "DOWN"
de$diffexpressed[de$log2FoldChange < -0.0 & de$pvalue < 0.05] <- "DOWN"
de$delabel <- NA
# label all up and downregulated genes
# de[!(is.na(de$diffexpressed)),]$delabel <- de[!(is.na(de$diffexpressed)),]$symbol
# label top up and downregulated genes
# largest -log10(p-value)
lowN = c("BMP4_diff_3w - BMP4_diff_2w|untreated" , "CNTF_diff_3w - CNTF_diff_2w|untreated")
highN = dge_res_sheets_key$Comparison[-c(3,10)]
if (name %in% lowN) {
N = 5 # number of genes - will depend on the number of significant gene distribution for the DE comparison
} else {N = 10}
sym = de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$symbol
de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$delabel <- paste0("italic('",sym,"')" )
# largest log2(FC), also significant
sigde <- de %>% dplyr::filter(pvalue < 0.05)
labelid <- sigde[head(order(abs(sigde$log2FoldChange), decreasing = TRUE),7),]$id
sym = de[de$id %in% labelid,]$symbol
de[de$id %in% labelid,]$delabel <- paste0("italic('",sym,"')" )
xlim = max(abs(de$log2FoldChange))+(max(abs(de$log2FoldChange))*0.3)
ylim = max(-log10(de$pvalue))+(max(-log10(de$pvalue))*1)
uplabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="UP")
dwlabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="DOWN")
vol_plot <- ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
geom_point(size = 1, alpha = 0.5) +
scale_x_continuous(expand = c(0, 0), limits=c(-xlim,xlim)) +
scale_y_continuous(expand = c(0, 0), limits=c(0,ylim)) +
theme_classic(base_size=18) +
geom_label_repel(
data = dwlabel,
aes(label = delabel),
size=6,
min.segment.length = unit(0.5, 'lines'),
nudge_y = 5.9,
# nudge_x = 7.9,
xlim = c(NA, -1),
max.overlaps=Inf,
parse = TRUE,
seed = 1) +
geom_label_repel(
data = uplabel,
aes(label = delabel),
size=6,
min.segment.length = unit(0.5, 'lines'),
nudge_y = 5.9,
# nudge_x = 7.9,
xlim = c(1, NA),
max.overlaps=Inf,
parse = TRUE,
seed = 1) +
scale_color_manual(values=c("#788feb", "#942025", "black")) +
# geom_vline(xintercept=c(-0.6, 0.6), col="black") +
geom_hline(yintercept=-log10(0.05), col="black", linetype = 2) +
ggtitle(name) +
theme(plot.title = element_text(hjust = 0.5, size=18),
legend.position = "none")
ggsave(file=paste0(workdir,"/volcano_plot/volcano_",name,".svg"), plot=vol_plot, width=6.5, height=6.5)
plotl <- c(plotl, list(vol_plot))
## extra: for gene-term plot querying
# de_genes <- de[!is.na(de$delabel),]$delabel
}# apply function to one select comparison
# i=3
# lapply(i, plot_volcano)# # load differentiated BMP4 and CNTF marker genes
# res.file<-paste0(workdir, "/analysed_data/differential_analysis_wo2W_BMP4_CNTF_wo2W.xlsx")
# dge_res_sheets <- readxl::excel_sheets(res.file)[-c(1:4)]
# dge_res_sheets <- data.frame(sheets = dge_res_sheets, row.names = dge_res_sheets)
# dge_res_sheets_key <- readxl::read_xlsx(res.file, sheet = "Comparison Key")
#
# # replace long sheet name from the keys
# dge_res_sheets[dge_res_sheets_key$Key, ] <- dge_res_sheets_key$Comparison
#
# # select comparisons of interest
# dgeBulk <- imap(rownames(dge_res_sheets), ~readxl::read_xlsx(res.file, sheet = .x)) %>%
# setNames(nm = dge_res_sheets$sheets)
# # dgeBulk_sel <- dgeBulk[c("BMP4_diff_3w - progenitor|untreated",
# # "CNTF_diff_3w - progenitor|untreated")]
# dgeBulk_sel <- dgeBulk[c(dge_res_sheets_key$Comparison)]
# dgeBulk_sel = dgeBulk_sel[-c(7)]# # create function for plot
# plot_volcano <- function(i) {
#
# # select relevant comparison
# de = dgeBulk_sel[[i]]
# name=names(dgeBulk_sel)[i]
# # print(name)
#
# # ensure empty gene symbols are replaced with ensembl IDs
# de[is.na(de$symbol),]$symbol <- de[is.na(de$symbol),]$id
#
# # add a column of NAs
# de$diffexpressed <- NA
# # if log2Foldchange > 0.0 and pvalue < 0.05, set as "UP"
# de$diffexpressed[de$log2FoldChange > 0.0 & de$pvalue < 0.05] <- "UP"
# # if log2Foldchange < -0.0 and pvalue < 0.05, set as "DOWN"
# de$diffexpressed[de$log2FoldChange < -0.0 & de$pvalue < 0.05] <- "DOWN"
# de$delabel <- NA
#
# # label all up and downregulated genes
# # de[!(is.na(de$diffexpressed)),]$delabel <- de[!(is.na(de$diffexpressed)),]$symbol
#
# # label top up and downregulated genes
# # largest -log10(p-value)
# lowN = c("BMP4_diff_3w - BMP4_diff_2w|untreated" , "CNTF_diff_3w - CNTF_diff_2w|untreated")
# highN = dge_res_sheets_key$Comparison[-c(3,10)]
# if (name %in% lowN) {
# N = 5 # number of genes - will depend on the number of significant gene distribution for the DE comparison
# } else {N = 10}
#
# sym = de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$symbol
# de[head(order(abs(-log10(de$pvalue)), decreasing = TRUE),N),]$delabel <- paste0("italic('",sym,"')" )
#
# # largest log2(FC), also significant
# sigde <- de %>% dplyr::filter(pvalue < 0.05)
# labelid <- sigde[head(order(abs(sigde$log2FoldChange), decreasing = TRUE),7),]$id
# sym = de[de$id %in% labelid,]$symbol
# de[de$id %in% labelid,]$delabel <- paste0("italic('",sym,"')" )
#
# xlim = max(abs(de$log2FoldChange))+(max(abs(de$log2FoldChange))*0.3)
# ylim = max(-log10(de$pvalue))+(max(-log10(de$pvalue))*0.5)
# ynud =
#
# uplabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="UP")
# dwlabel = de %>% filter(is.na(delabel)==FALSE) %>% dplyr::filter(diffexpressed=="DOWN")
#
#
# vol_plot <- ggplot(data=de, aes(x=log2FoldChange, y=-log10(pvalue), col=diffexpressed, label=delabel)) +
# geom_point(size = 1, alpha = 0.5) +
# scale_x_continuous(expand = c(0, 0), limits=c(-xlim,xlim)) +
# scale_y_continuous(expand = c(0, 0), limits=c(0,ylim)) +
# theme_classic(base_size=18) +
#
# geom_label_repel(
# data = dwlabel,
# aes(label = delabel),
# size=6,
# min.segment.length = unit(0.5, 'lines'),
# nudge_y = 5.9,
# # nudge_x = 7.9,
# xlim = c(NA, -1),
# max.overlaps=Inf,
# parse = TRUE,
# seed = 1) +
#
# geom_label_repel(
# data = uplabel,
# aes(label = delabel),
# size=6,
# min.segment.length = unit(0.5, 'lines'),
# nudge_y = 5.9,
# # nudge_x = 7.9,
# xlim = c(1, NA),
# max.overlaps=Inf,
# parse = TRUE,
# seed = 1) +
#
# scale_color_manual(values=c("#788feb", "#942025", "black")) +
# # geom_vline(xintercept=c(-0.6, 0.6), col="black") +
# geom_hline(yintercept=-log10(0.05), col="black", linetype = 2) +
# ggtitle(name) +
# theme(plot.title = element_text(hjust = 0.5, size=18),
# legend.position = "none")
#
# # ggsave(file=paste0(workdir,"/volcano_plot/volcano_",name,".svg"), plot=vol_plot, width=5.5, height=5.5)
#
# return(vol_plot)
#
# ## extra: for gene-term plot querying
# # de_genes <- de[!is.na(de$delabel),]$delabel
# }# apply function to one select comparison
# i=3
# lapply(i, plot_volcano)